{
    "cells": [
        {
            "cell_type": "markdown",
            "id": "5d974136",
            "metadata": {},
            "source": [
                "# Faiss Demo"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "4026b434",
            "metadata": {},
            "outputs": [],
            "source": [
                "import logging\n",
                "import sys\n",
                "\n",
                "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
                "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "b541d8ec",
            "metadata": {},
            "outputs": [],
            "source": [
                "from gpt_index.readers.faiss import FaissReader"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "90d37078",
            "metadata": {},
            "outputs": [],
            "source": [
                "# Build the Faiss index. \n",
                "# A guide for how to get started with Faiss is here: https://github.com/facebookresearch/faiss/wiki/Getting-started\n",
                "# We provide some example code below.\n",
                "\n",
                "import faiss\n",
                "\n",
                "# # Example Code\n",
                "# d = 8\n",
                "# docs = np.array([\n",
                "#     [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],\n",
                "#     [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],\n",
                "#     [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],\n",
                "#     [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],\n",
                "#     [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
                "# ])\n",
                "# # id_to_text_map is used for query retrieval\n",
                "# id_to_text_map = {\n",
                "#     0: \"aaaaaaaaa bbbbbbb cccccc\",\n",
                "#     1: \"foooooo barrrrrr\",\n",
                "#     2: \"tmp tmptmp tmp\",\n",
                "#     3: \"hello world hello world\",\n",
                "#     4: \"cat dog cat dog\"\n",
                "# }\n",
                "# # build the index\n",
                "# index = faiss.IndexFlatL2(d)\n",
                "# index.add(docs)\n",
                "\n",
                "id_to_text_map = {\n",
                "    \"id1\": \"text blob 1\",\n",
                "    \"id2\": \"text blob 2\",\n",
                "}\n",
                "index = ..."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "fd470a09",
            "metadata": {},
            "outputs": [],
            "source": [
                "reader = FaissReader(index)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "c33084c5",
            "metadata": {},
            "outputs": [],
            "source": [
                "# To load data from the Faiss index, you must specify: \n",
                "# k: top nearest neighbors\n",
                "# query: a 2D embedding representation of your queries (rows are queries)\n",
                "k = 4\n",
                "query1 = np.array([...])\n",
                "query2 = np.array([...])\n",
                "query=np.array([query1, query2])\n",
                "\n",
                "documents = reader.load_data(query=query, id_to_text_map=id_to_text_map, k=k)"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "0b74697a",
            "metadata": {},
            "source": [
                "### Create index"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "e85d7e5b",
            "metadata": {},
            "outputs": [],
            "source": [
                "index = GPTListIndex.from_documents(documents)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "31c3b68f",
            "metadata": {},
            "outputs": [],
            "source": [
                "# set Logging to DEBUG for more detailed outputs\n",
                "response = index.query(\"<query_text>\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "56fce3fb",
            "metadata": {},
            "outputs": [],
            "source": [
                "display(Markdown(f\"<b>{response}</b>\"))"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3 (ipykernel)",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.11.1"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}